In [ ]:
from os import path
from collections import OrderedDict

import astropy.coordinates as coord
import astropy.units as u
from astropy.io import fits
from astropy.table import Table
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('apw-notebook')
%matplotlib inline
from scipy.optimize import minimize
from scipy.signal import argrelmin
import emcee
import corner

from comoving_rv.longslit.wavelength import fit_spec_line, fit_spec_line_GP
from comoving_rv.longslit.models import voigt_polynomial

In [ ]:
root_path = path.normpath('../data/mdm-spring-2017/processed/')

In [ ]:
night = 'n3'
frame = 65

In [ ]:
hdr = fits.getheader(path.join(root_path, night, '1d_{}.{:04d}.fit'.format(night, frame)), 0)
coord.SkyCoord(ra=hdr['RA'], dec=hdr['DEC'], unit=(u.hourangle, u.degree))

In [ ]:
# read master_wavelength file
# pix_wav = np.genfromtxt(path.join(root_path, night, 'master_wavelength.csv'),
#                         delimiter=',', names=True)
pix_wav = np.genfromtxt(path.join(root_path, 'wavelength_guess.csv'),
                        delimiter=',', names=True)

idx = (pix_wav['wavelength'] < 6950) & (pix_wav['wavelength'] > 5400)
pix_wav = pix_wav[idx] # HACK
pix_range = [min(pix_wav['pixel']), max(pix_wav['pixel'])]

In [ ]:
spec = Table.read(path.join(root_path, night, '1d_{}.{:04d}.fit'.format(night, frame)))

In [ ]:
plt.plot(spec['pix'][250:1200], spec['source_flux'][250:1200], drawstyle='steps-mid', marker='')
plt.xlim(1200, 250)

In [ ]:
coef = np.polynomial.polynomial.polyfit(pix_wav['pixel'], pix_wav['wavelength'],
                                        deg=4)

# compute wavelength array for the pixels
wave = np.polynomial.polynomial.polyval(spec['pix'], coef)
wave[(spec['pix'] > max(pix_range)) | (spec['pix'] < min(pix_range))] = np.nan

plt.scatter(pix_wav['pixel'], np.polynomial.polynomial.polyval(pix_wav['pixel'], coef) - pix_wav['wavelength'])

Try cross-validation to get polynomial degree


In [ ]:
from sklearn.model_selection import LeaveOneOut

In [ ]:
cv_instance = LeaveOneOut()
x,y = pix_wav['pixel'],pix_wav['wavelength']
poly_fit_func = np.polynomial.polynomial.polyfit
poly_val_func = np.polynomial.polynomial.polyval
poly_deg = 5

In [ ]:
def cv_score(poly_deg, x, y, poly_fit_func, poly_val_func, cv_instance):    
    mse = []
    for train, test in cv_instance.split(x, y):
        coef = poly_fit_func(x[train], y[train], deg=poly_deg) # w=ivar,
        pred = poly_val_func(x[test], coef)
        mse.append((y[test] - pred)**2)

    return np.squeeze(np.array(mse))

In [ ]:
pix_wav['wavelength'][:-1]

In [ ]:
# [np.polynomial.chebyshev.chebfit, np.polynomial.chebyshev.chebval],
# [np.polynomial.hermite.hermfit, np.polynomial.hermite.hermval]]:
poly_fit_func, poly_val_func = [np.polynomial.polynomial.polyfit, np.polynomial.polynomial.polyval]
args = (pix_wav['pixel'][:-1], pix_wav['wavelength'][:-1], 
        poly_fit_func, poly_val_func, LeaveOneOut())

degs = np.arange(1, 11+1, 1)
scores = np.zeros((len(degs), len(args[0])))
for i,deg in enumerate(degs):
    scores[i] = cv_score(deg, *args)

plt.plot(degs, scores.mean(axis=1))
    
plt.yscale('log')
plt.xlabel('polynomial degree')
plt.ylabel('cross validation score')

In [ ]:
scores.shape

In [ ]:
plt.plot(scores[2])


In [ ]:
plt.scatter(pix_wav['pixel'], pix_wav['wavelength'])

_grid = np.linspace(pix_wav['pixel'].min(), pix_wav['pixel'].max(), 256)
plt.plot(_grid, np.polynomial.polynomial.polyval(_grid, coef), 
         marker='', linestyle='-', alpha=0.4)

In [ ]:
plt.plot(np.abs(coef), linestyle='none', marker='o')
plt.yscale('log')

Define data arrays to be used in fitting below


In [ ]:
# H alpha
# near_Ha = (wave > 6510) & (wave < 6615)
# flux_data = spec['source_flux'][near_Ha]
# ivar_data = spec['source_ivar'][near_Ha]
# absorp_emiss = -1.
# target_x = 6562.8

# O2 6867
# near_Ha = (wave > 6767) & (wave < 6929) 
# target_x = 6867.
# absorp_emiss = -1.
# flux_data = spec['source_flux'][near_Ha]
# ivar_data = spec['source_ivar'][near_Ha]

# SKY LINE
near_Ha = (wave > 5477) & (wave < 5677) # [OI] 5577
target_x = 5577.
# near_Ha = (wave > 6200) & (wave < 6400) # [OI] 6300
# target_x = 6300.
absorp_emiss = 1.
flux_data = spec['background_flux'][near_Ha]
ivar_data = spec['background_ivar'][near_Ha]

wave_data = wave[near_Ha]

_idx = wave_data.argsort()
wave_data = wave_data[_idx]
flux_data = flux_data[_idx]
ivar_data = ivar_data[_idx]
err_data = 1/np.sqrt(ivar_data)

In [ ]:
pars = fit_spec_line(wave_data, flux_data, ivar_data,
                     absorp_emiss=absorp_emiss, target_x=target_x, 
                     fwhm_L0=4., std_G0=0.1, n_bg_coef=2)
print(pars['x0'])

# plot that ish
plt.plot(wave_data, flux_data, drawstyle='steps-mid', marker='')

wave_grid = np.linspace(wave_data.min(), wave_data.max(), 256)
plt.plot(wave_grid, voigt_polynomial(wave_grid, **pars), marker='', alpha=0.5)

In [ ]:
gp = fit_spec_line_GP(wave_data, flux_data, ivar_data,
                      absorp_emiss=absorp_emiss, target_x=target_x, 
                      fwhm_L0=4., std_G0=1., n_bg_coef=2)

In [ ]:
gp.get_parameter_dict()

In [ ]:
def get_fit_pars(gp):
    fit_pars = OrderedDict()
    for k,v in gp.get_parameter_dict().items():
        if 'mean' not in k:
            continue

        k = k[5:] # remove 'mean:'
        if k.startswith('ln'):
            if 'amp' in k:
                fit_pars[k[3:]] = -np.exp(v)
            else:
                fit_pars[k[3:]] = np.exp(v)

        elif k.startswith('bg'):
            if 'bg_coef' not in fit_pars:
                fit_pars['bg_coef'] = []
            fit_pars['bg_coef'].append(v)

        else:
            fit_pars[k] = v
    
    if 'std_G' not in fit_pars:
        fit_pars['std_G'] = 1E-10
        
    return fit_pars

In [ ]:
fit_pars = get_fit_pars(gp)
fit_pars

In [ ]:
# Make the maximum likelihood prediction
mu, var = gp.predict(flux_data, wave_grid, return_var=True)
std = np.sqrt(var)

# data
plt.plot(wave_data, flux_data, drawstyle='steps-mid', marker='')
plt.errorbar(wave_data, flux_data, err_data,
             marker='', ls='none', ecolor='#666666', zorder=-10)

# mean model
plt.plot(wave_grid, voigt_polynomial(wave_grid, **fit_pars), marker='', alpha=0.5)

# full GP model
gp_color = "#ff7f0e"
plt.plot(wave_grid, mu, color=gp_color, marker='')
plt.fill_between(wave_grid, mu+std, mu-std, color=gp_color, alpha=0.3, edgecolor="none")

Run emcee instead to sample over GP model parameters:


In [ ]:
def log_probability(params):
    gp.set_parameter_vector(params)
    lp = gp.log_prior()
    if not np.isfinite(lp):
        return -np.inf
    
#     if params[1] < -5:
#         return -np.inf
    # HACK:
    var = 1.
    lp += -0.5*(params[1]-1)**2/var - 0.5*np.log(2*np.pi*var)
    
    if params[4] < -10. or params[5] < -10.:
        return -np.inf
    
    ll = gp.log_likelihood(flux_data)
    if not np.isfinite(ll):
        return -np.inf
    
    return ll + lp

In [ ]:
if fit_pars['std_G'] < 1E-2:
    gp.freeze_parameter('mean:ln_std_G')

In [ ]:
initial = np.array(gp.get_parameter_vector())
if initial[4] < -10:
    initial[4] = -8.
if initial[5] < -10:
    initial[5] = -8.
ndim, nwalkers = len(initial), 64
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability)

print("Running burn-in...")
p0 = initial + 1e-6 * np.random.randn(nwalkers, ndim)
p0, lp, _ = sampler.run_mcmc(p0, 256)

print("Running 2nd burn-in...")
sampler.reset()
p0 = p0[lp.argmax()] + 1e-3 * np.random.randn(nwalkers, ndim)
p0, lp, _ = sampler.run_mcmc(p0, 512)

print("Running production...")
sampler.reset()
pos, lp, _ = sampler.run_mcmc(p0, 512);

In [ ]:
fig,axes = plt.subplots(2,4,figsize=(18,6))
for i in range(sampler.dim):
    for walker in sampler.chain[...,i]:
        axes.flat[i].plot(walker, marker='', drawstyle='steps-mid', alpha=0.2)
    axes.flat[i].set_title(gp.get_parameter_names()[i], fontsize=12)
    fig.tight_layout()

In [ ]:
fig,axes = plt.subplots(3, 1, figsize=(6,9), sharex=True)

# plot some samples
samples = sampler.flatchain
for s in samples[np.random.randint(len(samples), size=32)]:
    gp.set_parameter_vector(s)
    
    fit_pars = get_fit_pars(gp)
    _mean_model = voigt_polynomial(wave_grid, **fit_pars)
    axes[0].plot(wave_grid, _mean_model, 
                 marker='', alpha=0.25, color='#3182bd', zorder=-10)
    
    mu = gp.predict(flux_data, wave_grid, return_cov=False)
    axes[1].plot(wave_grid, mu-_mean_model, color=gp_color, alpha=0.25, marker='')
    axes[2].plot(wave_grid, mu, color='#756bb1', alpha=0.25, marker='')
    
axes[2].plot(wave_data, flux_data, drawstyle='steps-mid', marker='', zorder=-6)
axes[2].errorbar(wave_data, flux_data, err_data,
                 marker='', ls='none', ecolor='#666666', zorder=-10)

axes[2].set_ylabel('flux')
axes[2].set_xlabel(r'wavelength [$\AA$]')
axes[0].set_title('mean model (voigt + poly.)')
axes[1].set_title('noise model (GP)')
axes[2].set_title('full model')

fig.tight_layout()
fig.savefig('/Users/adrian/Downloads/spec_model_demo.png', dpi=256)

In [ ]:
corner.corner(sampler.flatchain[::10, :], 
              labels=[x.split(':')[1] for x in gp.get_parameter_names()]);

In [ ]:
Halpha = 6562.8

In [ ]:
MAD = np.median(np.abs(sampler.flatchain[:, 3] - np.median(sampler.flatchain[:, 3])))
v_precision = 1.48 * MAD / Halpha * 300000. * u.km/u.s 
v_precision

In [ ]:
np.median(sampler.flatchain[:, 3])

In [ ]:
(np.median(sampler.flatchain[:, 3]) - Halpha) / Halpha * 300000. * u.km/u.s

In [ ]:


In [ ]: